library(microViz)
## 
## microViz version 0.9.3 - Copyright (C) 2021 David Barnett
## * Website: https://david-barnett.github.io/microViz/
## * Useful? For citation info, run: citation('microViz')
## * Silence: suppressPackageStartupMessages(library(microViz))
pacman::p_load(tidyverse, phyloseq, magrittr, janitor, microbiome, knitr, lubridate, naniar, readxl, ggplot2, ggpubr)

library(dendextend)
## Warning: package 'dendextend' was built under R version 4.1.2
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:stats':
## 
##     cutree
library(vegan)
## Warning: package 'vegan' was built under R version 4.1.2
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## 
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
## 
##     shuffle
## Loading required package: lattice
## This is vegan 2.6-4
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
## 
##     diversity
ps_full <- readRDS("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/rds_files/ps_IMProveCF_patientsAndControls_Run1-23_18102023.rds")

source("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/IMMProveCF/functions_full.R")

visit_sum_palette <- c("black", "#C3BC3FFF", "#6388B4FF", "#BB7693FF", "#55AD89FF", "#8176AA")

dom_palette <- c(Streptococcus="#69b3a2", Staphylococcus = "#8175AA", Fusobacterium="#6FB899FF", Prevotella_7="#31A1B3FF", Rothia="#027B8EFF", Pseudomonas="#EE6AA7", Eikenella="#94D0C0FF", Haemophilus="#CCB22BFF", Achromobacter="#9F8F12FF", Gemella= "#97CFD0" , Moraxella= "#6FB899", `missing sample` = "#CFCFCF", `Burkholderia-Caballeronia-Paraburkholderia` = "#E5C494")
ps_full_relab <- transform_sample_counts(ps_full, function(x) x/sum(x)*100)

#subset materials
ps_full_throat <- subset_samples(ps_full_relab, material== "Throat")
ps_full_stool <- subset_samples(ps_full_relab, material== "Stool")

ps_full_throat_glom <- tax_glom(ps_full_throat, taxrank = "Genus")
ps_full_stool_glom <- tax_glom(ps_full_stool, taxrank = "Genus")

1 Analysis on most abundant taxa per patient in throat

#Get top taxa per patient
#find.top.taxa2 is sourced from functions.R
top.throat<- find.top.taxa2(ps_full_throat_glom, "Genus",1)
top.throat$Species<- NULL

rslt <- top.throat[, "taxa"]
dd <- matrix(unlist(rslt), nrow=1)
colnames(dd) <- rownames(top.throat)
top.throat <- t(dd)

top.throat_df <- data.frame(x1 = row.names(top.throat), top.throat)%>%
  mutate(dominantGenus = top.throat)
top.throat_df$top.throat<- NULL

# add clinical data to p

##Add dominant Genus to ps_full_throat_glom sample data
ps_full_throat_glom_j <- microViz::ps_join(ps_full_throat_glom, top.throat_df, by = "x1")

##Add dominant Genus to ps_full_throat sample data
ps_full_throat_j <- microViz::ps_join(ps_full_throat, top.throat_df, by = "x1")

# Control's dominant taxa
summary(as_factor(sample_data(ps_full_throat_j)$dominantGenus[sample_data(ps_full_throat_j)$project=="IMMPIMMP"]))
## Streptococcus   Veillonella     Neisseria Fusobacterium   Haemophilus 
##            32             2             3             1             1
# Patients's dominant taxa
summary(as_factor(sample_data(ps_full_throat_j)$dominantGenus[sample_data(ps_full_throat_j)$project!="IMMPIMMP"]))
##        Prevotella_7       Streptococcus      Stomatobaculum             Gemella 
##                  32                 109                   1                   2 
##       Fusobacterium        Leptotrichia         Veillonella         Actinomyces 
##                   9                  10                  15                   1 
##         Haemophilus           Neisseria              Rothia      Actinobacillus 
##                   4                  13                   1                   2 
##          Prevotella        Oribacterium Lachnoanaerobaculum                NA's 
##                   3                   1                   1                   9
 ### plot principal component analysis by dominant Genus
PCA_all_visit_throat_dominantGenus <- plot_ordination(ps_full_throat_j, ordinate(ps_full_throat_j, "MDS"), color = "dominantGenus", shape="visit_sum")

xlabels <- c("0","3","6-12","15-18","21-24","Control")

PCA_all_visit_throat_dominantGenus+
  geom_line(aes(group=id.x), color="grey")+
  geom_point(size = 4)+
  ggtitle("Variation by dominant Genus")+
  scale_color_manual(values = dom_palette)+
  scale_shape_manual(values = c(15, 16, 17, 18, 12, 23),labels = xlabels)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "dominant genus"))+
   guides(shape = guide_legend(title = "months from treatment start"))

### plot principal component analysis by visit_sum
PCA_all_visit_throat_dominantGenus <- plot_ordination(ps_full_throat_j, ordinate(ps_full_throat_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

xlabels <- c("0", "3", "6-12", "15-18", "21-24", "Control")

p1 <- PCA_all_visit_throat_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 2))+
  stat_ellipse()
p1

p1_leg <- PCA_all_visit_throat_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 16), legend.position = "bottom", legend.text = element_text(size = 16)) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 6),shape = guide_legend(title = "", title.position = "left", ncol = 2))+
  stat_ellipse()
p1_leg

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/beta_throat_controls.png", width = 10, height = 6)

2 create density plots for axis 1 and 2

# Perform PCoA
ord_obj <- ordinate(ps_full_throat_j, "MDS")

# Extract coordinates from PCoA
pcoa_coords <- ord_obj$vectors

# Create a data frame for ggplot
df <- data.frame(x = pcoa_coords[, 1], y = pcoa_coords[, 2], visit_sum = sample_data(ps_full_throat_j)$visit_sum)

# Create density plots on the x and y axes
plot_x_th <- ggplot(df, aes(x = x, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  scale_x_continuous(breaks = c(-0.3, 0, 0.3), labels = c("-0.3", "0", "0.3"))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none", text = element_text(size = 18), legend.title = element_blank())


plot_y_th <- ggplot(df, aes(x = y, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  xlim(c(-0.6, 0.4))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none",text = element_text(size = 18), legend.title = element_blank())+
  coord_flip()

# Extract the legend. Returns a gtable
leg <- get_legend(p1_leg)

# Convert to a ggplot and print
legend <- as_ggplot(leg)

# Arranging the plot
throat_density <- ggarrange(plot_x_th, NULL, p1, plot_y_th, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(2, 1), heights = c(1, 2),
          common.legend = F)
throat_density

3 PERMANOVA in throat

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_throat_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_throat_j),"data.frame")

### stratified PERMANOVA per visit group
adonis2(formula = BC_dist ~ visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
### stratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)

4 compare CF and control BC-distances for throat

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_throat_j,
                             method="bray", weighted=F)
BC_dist.throat <- as.matrix(BC_dist)
#BC_dist.throat[lower.tri(BC_dist.throat)] <- 0  # this is needed to remove the 0 latter, to keep only single pairs
BC_dist.throat_df <- reshape2::melt(BC_dist.throat)

tmp1 <- BC_dist.throat_df%>%
  filter(grepl("IMMP", Var1)) %>% #this keeps only control samples in Var 1
  filter(!grepl("IMMP", Var2))#%>%  #this filters control samples from Var 2, so that in the end only distances between COntrols and all CF samples are kept
  #filter(value!=0)#this removes the same sample distances, and the one from set 0 from above to keep a single pair

# merge with metadata to have visit_sums for CF samples
#extract metadata
metadata<- as(sample_data(ps_full_throat_j),"data.frame")
tmp1 <- tmp1 %>% 
  left_join(metadata, by=c("Var2"="x_sample_id")) %>% 
  mutate(comparison=paste(Var1, Var2, sep = "_")) %>% 
  distinct(comparison, .keep_all = T)

summary(tmp1$visit_sum)
##       1       2     3-5     6-7    8-10 Control 
##     819    1131    3120    1677    1560       0
my_comp <- list(c("1","2"), c("1","3-5"), c("1","6-7"), c("1","8-10"))

# Calculate median and IQR
summary_stats <- tmp1 %>%
  group_by(visit_sum) %>%
  summarize(
    median = median(value),
    lower = quantile(value, 0.25),
    upper = quantile(value, 0.75)
  )

tmp1 %>% 
  ggplot(aes(visit_sum, value, fill=visit_sum))+
  geom_boxplot(outlier.shape = NA, alpha=0.7)+
  #geom_point()+
  theme_classic()+
  scale_fill_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  scale_x_discrete(labels= xlabels)+
  xlab("")+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.adj")

ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BC_throat_controls.png", width = 8, height = 6)

summary(lmerTest::lmer(value~visit_sum + (1|Var1)+ (1|id.x), data = tmp1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ visit_sum + (1 | Var1) + (1 | id.x)
##    Data: tmp1
## 
## REML criterion at convergence: -14544.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.3008 -0.6450  0.0106  0.6368  3.6986 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.003130 0.05595 
##  id.x     (Intercept) 0.004097 0.06401 
##  Residual             0.009735 0.09867 
## Number of obs: 8307, groups:  Var1, 39; id.x, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.889e-01  1.451e-02  7.477e+01  47.497  < 2e-16 ***
## visit_sum2    -4.180e-02  4.633e-03  8.239e+03  -9.023  < 2e-16 ***
## visit_sum3-5  -2.638e-02  4.155e-03  8.263e+03  -6.350 2.27e-10 ***
## visit_sum6-7  -3.761e-02  4.613e-03  8.264e+03  -8.154 4.02e-16 ***
## visit_sum8-10 -1.613e-02  4.681e-03  8.264e+03  -3.445 0.000574 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7
## visit_sum2  -0.190                     
## visit_sm3-5 -0.221  0.675              
## visit_sm6-7 -0.204  0.622  0.760       
## vist_sm8-10 -0.202  0.620  0.753  0.718
# below I plot the BC distance per patient and each control, with controls being on the x-axis, one can observe the consistency of BC distance per patient across controls, with visit_sum giving a distinct signal in each patient
tmp1 %>% 
  ggplot(aes(Var1, value, fill=visit_sum))+
  #geom_boxplot(outlier.shape = NA)+
  geom_point(aes(color=visit_sum), alpha=0.5)+
  theme_classic()+
  scale_color_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  #scale_x_discrete(labels= xlabels)+
  xlab("")+
  facet_wrap(~id.x)

5 throat linear model + boxplot

lm_boxplot_function <- function(dependent_variable, ylabel) {

  # Fit linear mixed-effects model
  print(summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1)))
  lm <- summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1))
  #lm <- summary(lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id), data = tmp1))
  # Extract coefficients and adjust p-values
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  # Create lm_stats table
  lm_stats <- bind_cols(coefs, fdr) %>%
    mutate(p = Pr...t..) %>%
    mutate(fdr = ...6) %>%
    select(-c(Pr...t.., ...6)) %>%
    rownames_to_column() %>%
    mutate(Months_after_ETI_start = rowname) %>%
    mutate(Months_after_ETI_start = case_when(
      Months_after_ETI_start == "(Intercept)" ~ "Baseline (Intercept)",
      Months_after_ETI_start == "visit_sum2" ~ "3 months",
      Months_after_ETI_start == "visit_sum3-5" ~ "6-12 months",
      Months_after_ETI_start == "visit_sum6-7" ~ "15-18 months",
      Months_after_ETI_start == "visit_sum8-10" ~ "21-24 months",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    mutate(fdr_star = case_when(
      fdr <= 0.001 ~ "***",
      fdr <= 0.01 ~ "**",
      fdr <= 0.05 ~ "*",
      fdr <= 0.1 ~ ".",
      fdr >= 0.1 ~ "ns"
    )) %>%
    select(-rowname) %>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, fdr_star) %>%
    mutate(p = round(p, 5)) %>%
    mutate(fdr = round(fdr, 5))
  
  # Print lm_stats table
  print(lm_stats)
  
  # Save lm_stats table as HTML
  sjPlot::tab_df(lm_stats, title = paste("lmer(", dependent_variable, " ~ visit_sum + sex + age_y + (1|id), data=tmp1)"), file = paste("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/", dependent_variable, "_table.html", sep = ""))
  
  # Print boxplot
  group1 <- c("(Intercept)", rep("1", nrow(lm_stats) - 1))
  
  lm_sig<- lm_stats %>%
    bind_cols(group1) %>%
    mutate(group1 = ...9) %>%
    mutate(group2 = case_when(
      Months_after_ETI_start == "Baseline (Intercept)" ~ "1",
      Months_after_ETI_start == "3 months" ~ "2",
      Months_after_ETI_start == "6-12 months" ~ "3-5",
      Months_after_ETI_start == "15-18 months" ~ "6-7",
      Months_after_ETI_start == "21-24 months" ~ "8-10",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    filter(group1 != "(Intercept)") %>%
    filter(Months_after_ETI_start!="sex2") %>% 
    filter(Months_after_ETI_start!="age_y")
  
  max_y <- max(tmp1[[dependent_variable]], na.rm = TRUE)
  min_y <- min(tmp1[[dependent_variable]], na.rm = TRUE)
  
 # Calculate the adjusted y.position for stat labels
stat_labels_y_position <- max_y + 0.1 * (max_y - min_y)

  xlabels <- c("0", "3", "6-12", "15-18", "21-24")
  
# Print boxplot
boxplot <- tmp1 %>%
  ggplot(aes(get(dependent_variable), x = visit_sum)) +
  geom_boxplot(aes(fill = visit_sum), outlier.shape = NA, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette) +
  theme_classic() +
  ylab(ylabel) +
  xlab("Months from ETI treatment start") +
  scale_x_discrete(labels = xlabels) +
  theme(text = element_text(size = 18), legend.position = "none") +
   stat_pvalue_manual(lm_sig, label = "fdr_star", y.position = stat_labels_y_position, step.increase = 0.05)

return(list(lm_stats = lm_stats, boxplot = boxplot))
}

throat <- lm_boxplot_function("value", "Throat: BC distance - CF & Control")
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)")
##    Data: tmp1
## 
## REML criterion at convergence: -14534.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2896 -0.6446  0.0092  0.6347  3.7029 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.003130 0.05595 
##  id       (Intercept) 0.003788 0.06155 
##  Residual             0.009731 0.09864 
## Number of obs: 8307, groups:  Var1, 39; id, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.296e-01  2.462e-02  4.433e+01  25.575  < 2e-16 ***
## visit_sum2    -4.221e-02  4.634e-03  8.252e+03  -9.109  < 2e-16 ***
## visit_sum3-5  -2.777e-02  4.191e-03  7.721e+03  -6.625 3.70e-11 ***
## visit_sum6-7  -4.039e-02  4.746e-03  5.152e+03  -8.510  < 2e-16 ***
## visit_sum8-10 -1.978e-02  4.910e-03  3.026e+03  -4.029 5.74e-05 ***
## sex2           2.901e-02  2.111e-02  3.049e+01   1.374   0.1795    
## age_y          1.801e-03  7.411e-04  3.462e+01   2.431   0.0204 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 sex2  
## visit_sum2  -0.086                                   
## visit_sm3-5 -0.035  0.673                            
## visit_sm6-7  0.050  0.612  0.763                     
## vist_sm8-10  0.098  0.601  0.751  0.736              
## sex2        -0.351  0.001  0.012  0.022  0.031       
## age_y       -0.691 -0.035 -0.133 -0.237 -0.303 -0.119
## New names:
## • `` -> `...6`
##   Months_after_ETI_start     Estimate   Std..Error         df   t.value       p
## 1   Baseline (Intercept)  0.629587955 0.0246168850   44.33130 25.575452 0.00000
## 2               3 months -0.042214097 0.0046345369 8252.29314 -9.108590 0.00000
## 3            6-12 months -0.027768307 0.0041912536 7721.41521 -6.625299 0.00000
## 4           15-18 months -0.040386983 0.0047459917 5152.04960 -8.509704 0.00000
## 5           21-24 months -0.019781584 0.0049098627 3025.76258 -4.028949 0.00006
## 6                   sex2  0.029005377 0.0211132808   30.49098  1.373798 0.17952
## 7                  age_y  0.001801253 0.0007410551   34.61843  2.430660 0.02040
##       fdr fdr_star
## 1 0.00000      ***
## 2 0.00000      ***
## 3 0.00000      ***
## 4 0.00000      ***
## 5 0.00008      ***
## 6 0.17952       ns
## 7 0.02380        *
## New names:
## • `` -> `...9`
p2 <- throat$boxplot
p2

throat$lm_stats
library(coefplot)
## Warning: package 'coefplot' was built under R version 4.1.2
# Fit your linear mixed-effects model
# For example, replace 'YourModel' and 'YourData' with your model and data, id is the CF patient id, and Var1 encodes the control id, by controlling for the pairs between CF patients and controls, implosion is controlled
model <-  lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id) + (1|Var1), data = tmp1)

# Extract coefficients
coefficients <- coef(model)

# Plot coefficients
  coef_p <- coefplot::coefplot(model, intercept = FALSE, color = "black")
  coef_p_t <- coef_p+
    theme_classic()+
    scale_y_discrete(labels= c("3 m", "6-12 m", "15-18 m", "21-24 m", "sex", "age"))+
    coord_flip()+
    xlab("Estimate")+
    ggtitle("")+
    ylab("")+
    theme(text = element_text(size = 18))
  coef_p_t

ggarrange(p1,p2, widths = c(2:1))

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BetaandBC_throat_controls.png", width = 13, height = 5.5)

6 Analysis on most abundant taxa per patient in stool

#Get top taxa per patient
#find.top.taxa2 is sourced from functions.R
top.stool<- find.top.taxa2(ps_full_stool_glom, "Genus",1)
top.stool$Species<- NULL

rslt <- top.stool[, "taxa"]
dd <- matrix(unlist(rslt), nrow=1)
colnames(dd) <- rownames(top.stool)
top.stool <- t(dd)

top.stool_df <- data.frame(x1 = row.names(top.stool), top.stool)%>%
  mutate(dominantGenus = top.stool)
top.stool_df$top.stool<- NULL

# add clinical data to p

##Add dominant Genus to ps_full_stool_glom sample data
ps_full_stool_glom_j <- microViz::ps_join(ps_full_stool_glom, top.stool_df, by = "x1")

##Add dominant Genus to ps_full_stool_glom sample data
ps_full_stool_j <- microViz::ps_join(ps_full_stool, top.stool_df, by = "x1")

### plot principal component analysis by visit_sum
PCA_all_visit_stool_dominantGenus <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

p3 <- PCA_all_visit_stool_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  stat_ellipse()
p3

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/beta_stool_controls.png", width = 10, height = 6)

7 create density plots for axis 1 and 2 stool

# Perform PCoA
ord_obj <- ordinate(ps_full_stool_j, "MDS")

# Extract coordinates from PCoA
pcoa_coords <- ord_obj$vectors

# Create a data frame for ggplot
df <- data.frame(x = pcoa_coords[, 1], y = pcoa_coords[, 2], visit_sum = sample_data(ps_full_stool_j)$visit_sum)

# Create density plots on the x and y axes
plot_x <- ggplot(df, aes(x = x, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  scale_y_continuous(breaks = c(0,1.0,2.0,3.0,4.0), labels = c("0.0", "1.0", "2.0","3.0","4.0"))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none", text = element_text(size = 18), legend.title = element_blank())

plot_y <- ggplot(df, aes(x = y, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  #xlim(c(-0.6, 0.4))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none", text = element_text(size = 18), legend.title = element_blank())+
  coord_flip()

# Extract the legend. Returns a gtable
leg <- get_legend(p1)

# Convert to a ggplot and print
legend <- as_ggplot(leg)

# Combine the density plots with the PCA plot
# Arranging the plot
stool_density <- ggarrange(plot_x, NULL, p3, plot_y, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(2, 1), heights = c(1, 2),
          common.legend = F)
stool_density 

8 Check nutrition and stool consistency on stool beta-diversity

### plot principal component analysis by nutrition
PCA_all_visit_stool_nutrition <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "basic_nutrition", shape="project")

project_label <- c("Control","CF")

PCA_all_visit_stool_nutrition+
  geom_point(size = 4)+
  ggtitle("Variation by time point with healthy controls")+
  #scale_color_manual(values = visit_sum_palette)+
  scale_shape_manual(values = c(23,15),labels = project_label)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "time points"))+
   guides(shape = guide_legend(title = "CF vs Control"))+
  stat_ellipse()
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Warning: Removed 3 rows containing missing values (`geom_path()`).

### plot principal component analysis by bristol
PCA_all_visit_stool_bristol <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "bristol", shape="project")

PCA_all_visit_stool_bristol+
  geom_point(size = 4)+
  ggtitle("Variation by time point with healthy controls")+
  #scale_color_manual(values = visit_sum_palette)+
  scale_shape_manual(values = c(23,15),labels = project_label)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "Bristol stool scale"))+
   guides(shape = guide_legend(title = "CF vs Control"))+
  stat_ellipse()
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Warning: Removed 2 rows containing missing values (`geom_path()`).

 #calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")
metadata$id.x
##   [1] "11"   "11"   "11"   "11"   "11"   "12"   "12"   "12"   "12"   "13"  
##  [11] "13"   "15"   "15"   "15"   "15"   "16"   "16"   "16"   "16"   "17"  
##  [21] "17"   "17"   "17"   "17"   "18"   "18"   "18"   "18"   "19"   "19"  
##  [31] "19"   "19"   "20"   "20"   "20"   "21"   "21"   "21"   "21"   "22"  
##  [41] "22"   "23"   "24"   "24"   "24"   "24"   "25"   "25"   "25"   "25"  
##  [51] "26"   "26"   "26"   "26"   "27"   "27"   "27"   "28"   "28"   "29"  
##  [61] "29"   "29"   "29"   "30"   "30"   "30"   "30"   "30"   "31"   "31"  
##  [71] "31"   "31"   "32"   "32"   "32"   "32"   "33"   "35"   "35"   "36"  
##  [81] "5"    "5"    "5"    "5"    "5"    "6"    "6"    "6"    "6"    "8"   
##  [91] "8"    "8"    "8"    "8"    "9"    "9"    "9"    "26"   "32"   "36"  
## [101] "9"    "11"   "11"   "15"   "15"   "16"   "17"   "18"   "21"   "21"  
## [111] "23"   "23"   "23"   "25"   "25"   "28"   "28"   "29"   "29"   "30"  
## [121] "31"   "32"   "33"   "36"   "37"   "38"   "39"   "40"   "41"   "5"   
## [131] "6"    "6"    "8"    "8"    "9"    "11"   "15"   "17"   "17"   "18"  
## [141] "19"   "19"   "21"   "21"   "23"   "24"   "24"   "25"   "25"   "26"  
## [151] "28"   "29"   "30"   "30"   "31"   "32"   "32"   "33"   "36"   "36"  
## [161] "38"   "38"   "39"   "40"   "41"   "41"   "42"   "42"   "43"   "5"   
## [171] "5"    "6"    "8"    "9"    "9"    "K001" "K002" "K003" "K004" "K006"
## [181] "K007" "K008" "K010" "K011" "K012" "K013" "K015" "11"   "15"   "17"  
## [191] "19"   "21"   "23"   "24"   "25"   "26"   "28"   "29"   "29"   "30"  
## [201] "31"   "32"   "33"   "36"   "37"   "39"   "40"   "40"   "41"   "42"  
## [211] "6"    "8"    "K009" "K016" "K017" "K019" "K020" "K022" "K023" "K029"
## [221] "K031" "K043" "K044" "K045" "K049" "19"   "38"   "39"   "40"   "41"  
## [231] "42"   "K040" "K042" "K050" "K052" "K053" "K005" "K018" "K021" "K024"
## [241] "K025" "K026" "K027" "K028" "K030" "K031" "K032" "K036" "K039" "K047"
## [251] "K048"
nutri <- vegan::adonis2(BC_dist ~ basic_nutrition + bristol + sex+ id.x,
              permutations = 999, na.action=na.exclude, data = metadata)
nutri

9 PERMANOVA in stool

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")

### stratified PERMANOVA per visit group
adonis2(formula = BC_dist ~ visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
### stratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)

10 compare CF and control BC-distances for stool

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
BC_dist.stool <- as.matrix(BC_dist)
#BC_dist.stool[lower.tri(BC_dist.stool)] <- 0  # this is needed to remove the 0 latter, to keep only single pairs
BC_dist.stool_df <- reshape2::melt(BC_dist.stool)

tmp1 <- BC_dist.stool_df%>%
  filter(grepl("IMMP", Var1)) %>% #this keeps only control samples in Var 1
  filter(!grepl("IMMP", Var2))#%>%  #this filters control samples from Var 2, so that in the end only distances between COntrols and all CF samples are kept
  #filter(value!=0)#this removes the same sample distances, and the one from set 0 from above to keep a single pair

# merge with metadata to have visit_sums for CF samples
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")
tmp1 <- tmp1 %>% 
  left_join(metadata, by=c("Var2"="x_sample_id")) %>% 
  mutate(comparison=paste(Var1, Var2, sep = "_")) %>% 
  distinct(comparison, .keep_all = T)

summary(tmp1$visit_sum)
##       1       2     3-5     6-7    8-10 Control 
##    1530    1170    3420    1800    1350       0
my_comp <- list(c("1","2"), c("1","3-5"), c("1","6-7"), c("1","8-10"))

# Calculate median and IQR
summary_stats <- tmp1 %>%
  group_by(visit_sum) %>%
  summarize(
    median = median(value),
    lower = quantile(value, 0.25),
    upper = quantile(value, 0.75)
  )

tmp1 %>% 
  ggplot(aes(visit_sum, value, fill=visit_sum))+
  geom_boxplot(outlier.shape = NA, alpha=0.7)+
  #geom_point()+
  theme_classic()+
  scale_fill_manual(values = visit_sum_palette)+
  ylab("BC distance: CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  scale_x_discrete(labels= xlabels)+
  xlab("")+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.adj")

ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BC_stool_controls.png", width = 6, height = 5)

summary(lmerTest::lmer(value~visit_sum + age_y+sex+ (1|id.x)+ (1|Var1), data = tmp1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ visit_sum + age_y + sex + (1 | id.x) + (1 | Var1)
##    Data: tmp1
## 
## REML criterion at convergence: -25960.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2239 -0.6299  0.0563  0.6663  3.9718 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.001064 0.03262 
##  id.x     (Intercept) 0.005001 0.07072 
##  Residual             0.003390 0.05823 
## Number of obs: 9270, groups:  Var1, 45; id.x, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.686e-01  2.549e-02  2.769e+01  26.233  < 2e-16 ***
## visit_sum2     1.548e-02  2.329e-03  9.206e+03   6.649 3.13e-11 ***
## visit_sum3-5  -1.033e-02  1.934e-03  3.064e+03  -5.340 9.96e-08 ***
## visit_sum6-7  -8.608e-03  2.404e-03  7.528e+02  -3.581 0.000364 ***
## visit_sum8-10 -3.248e-02  2.753e-03  3.748e+02 -11.798  < 2e-16 ***
## age_y          4.152e-03  7.890e-04  3.148e+01   5.263 9.68e-06 ***
## sex2          -4.800e-04  2.410e-02  2.086e+01  -0.020 0.984302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 age_y 
## visit_sum2   0.021                                   
## visit_sm3-5  0.167  0.545                            
## visit_sm6-7  0.288  0.459  0.662                     
## vist_sm8-10  0.355  0.413  0.628  0.655              
## age_y       -0.712 -0.082 -0.298 -0.453 -0.543       
## sex2        -0.403  0.008  0.032  0.046  0.058 -0.109
# below I plot the BC distance per patient and each control, with controls being on the x-axis, one can observe the consistency of BC distance per patient across controls, with visit_sum giving a distinct signal in each patient
tmp1 %>% 
  ggplot(aes(Var1, value, fill=visit_sum))+
  #geom_boxplot(outlier.shape = NA)+
  geom_point(aes(color=visit_sum), alpha=0.5)+
  theme_classic()+
  scale_color_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  #scale_x_discrete(labels= xlabels)+
  xlab("")+
  facet_wrap(~id.x)

11 stool linear model + boxplot

lm_boxplot_function <- function(dependent_variable, ylabel) {
  
  # Fit linear mixed-effects model
  print(summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1)))
  lm <- summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1))
  #lm <- summary(lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id), data = tmp1))
  # Extract coefficients and adjust p-values
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  # Create lm_stats table
  lm_stats <- bind_cols(coefs, fdr) %>%
    mutate(p = Pr...t..) %>%
    mutate(fdr = ...6) %>%
    select(-c(Pr...t.., ...6)) %>%
    rownames_to_column() %>%
    mutate(Months_after_ETI_start = rowname) %>%
    mutate(Months_after_ETI_start = case_when(
      Months_after_ETI_start == "(Intercept)" ~ "Baseline (Intercept)",
      Months_after_ETI_start == "visit_sum2" ~ "3 months",
      Months_after_ETI_start == "visit_sum3-5" ~ "6-12 months",
      Months_after_ETI_start == "visit_sum6-7" ~ "15-18 months",
      Months_after_ETI_start == "visit_sum8-10" ~ "21-24 months",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    mutate(fdr_star = case_when(
      fdr <= 0.001 ~ "***",
      fdr <= 0.01 ~ "**",
      fdr <= 0.05 ~ "*",
      fdr <= 0.1 ~ ".",
      fdr >= 0.1 ~ "ns"
    )) %>%
    select(-rowname) %>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, fdr_star) %>%
    mutate(p = round(p, 5)) %>%
    mutate(fdr = round(fdr, 5))
  
  # Print lm_stats table
  print(lm_stats)
  
  # Save lm_stats table as HTML
  sjPlot::tab_df(lm_stats, title = paste("lmer(", dependent_variable, " ~ visit_sum + sex + age_y + (1|id), data=tmp1)"), file = paste("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/", dependent_variable, "_table.html", sep = ""))
  
  # Print boxplot
  group1 <- c("(Intercept)", rep("1", nrow(lm_stats) - 1))
  
  lm_sig <- lm_stats %>%
    bind_cols(group1) %>%
    mutate(group1 = ...9) %>%
    mutate(group2 = case_when(
      Months_after_ETI_start == "Baseline (Intercept)" ~ "1",
      Months_after_ETI_start == "3 months" ~ "2",
      Months_after_ETI_start == "6-12 months" ~ "3-5",
      Months_after_ETI_start == "15-18 months" ~ "6-7",
      Months_after_ETI_start == "21-24 months" ~ "8-10",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    filter(group1 != "(Intercept)") %>%
    filter(Months_after_ETI_start!="sex2") %>% 
    filter(Months_after_ETI_start!="age_y")
  
  max_y <- max(tmp1[[dependent_variable]], na.rm = TRUE)
  min_y <- min(tmp1[[dependent_variable]], na.rm = TRUE)
  
 # Calculate the adjusted y.position for stat labels
stat_labels_y_position <- max_y + 0.1 * (max_y - min_y)

  xlabels <- c("0", "3", "6-12", "15-18", "21-24")
  
# Print boxplot
boxplot <- tmp1 %>%
  ggplot(aes(get(dependent_variable), x = visit_sum)) +
  geom_boxplot(aes(fill = visit_sum), outlier.shape = NA, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette) +
  theme_classic() +
  ylab(ylabel) +
  xlab("Months from ETI treatment start") +
  scale_x_discrete(labels = xlabels) +
  theme(text = element_text(size = 18), legend.position = "none") +
   stat_pvalue_manual(lm_sig, label = "fdr_star", y.position = stat_labels_y_position, step.increase = 0.05)

return(list(lm_stats = lm_stats, boxplot = boxplot))
}

stool <- lm_boxplot_function("value", "Stool: BC distance - CF & Control")
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)")
##    Data: tmp1
## 
## REML criterion at convergence: -25960.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2239 -0.6299  0.0563  0.6663  3.9718 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.001064 0.03262 
##  id       (Intercept) 0.005001 0.07072 
##  Residual             0.003390 0.05823 
## Number of obs: 9270, groups:  Var1, 45; id, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.686e-01  2.549e-02  2.769e+01  26.233  < 2e-16 ***
## visit_sum2     1.548e-02  2.329e-03  9.206e+03   6.649 3.13e-11 ***
## visit_sum3-5  -1.033e-02  1.934e-03  3.064e+03  -5.340 9.96e-08 ***
## visit_sum6-7  -8.608e-03  2.404e-03  7.528e+02  -3.581 0.000364 ***
## visit_sum8-10 -3.248e-02  2.753e-03  3.748e+02 -11.798  < 2e-16 ***
## sex2          -4.800e-04  2.410e-02  2.086e+01  -0.020 0.984302    
## age_y          4.152e-03  7.890e-04  3.148e+01   5.263 9.68e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 sex2  
## visit_sum2   0.021                                   
## visit_sm3-5  0.167  0.545                            
## visit_sm6-7  0.288  0.459  0.662                     
## vist_sm8-10  0.355  0.413  0.628  0.655              
## sex2        -0.403  0.008  0.032  0.046  0.058       
## age_y       -0.712 -0.082 -0.298 -0.453 -0.543 -0.109
## New names:
## • `` -> `...6`
##   Months_after_ETI_start      Estimate   Std..Error         df      t.value
## 1   Baseline (Intercept)  0.6686460360 0.0254882860   27.69322  26.23346414
## 2               3 months  0.0154825436 0.0023287270 9205.85956   6.64850084
## 3            6-12 months -0.0103261566 0.0019336476 3064.43496  -5.34024741
## 4           15-18 months -0.0086080236 0.0024037139  752.81380  -3.58113489
## 5           21-24 months -0.0324778153 0.0027528313  374.79532 -11.79796791
## 6                   sex2 -0.0004799697 0.0241038700   20.86432  -0.01991256
## 7                  age_y  0.0041525010 0.0007890069   31.47633   5.26294642
##         p     fdr fdr_star
## 1 0.00000 0.00000      ***
## 2 0.00000 0.00000      ***
## 3 0.00000 0.00000      ***
## 4 0.00036 0.00042      ***
## 5 0.00000 0.00000      ***
## 6 0.98430 0.98430       ns
## 7 0.00001 0.00001      ***
## New names:
## • `` -> `...9`
p4 <- stool$boxplot
stool$lm_stats
# Fit your linear mixed-effects model
# For example, replace 'YourModel' and 'YourData' with your model and data
model <-  lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id) + (1|Var1), data = tmp1)

# Extract coefficients
coefficients <- coef(model)

# Extract coefficients
coefficients <- coef(model)

# Plot coefficients
  coef_p <- coefplot::coefplot(model, intercept = FALSE, color = "black")
  coef_p_s<- coef_p+
    theme_classic()+
    scale_y_discrete(labels= c("3 m", "6-12 m", "15-18 m", "21-24 m", "sex", "age"))+
    coord_flip()+
    xlab("Estimate")+
    ggtitle("")+
    ylab("")+
    theme(text = element_text(size = 18))
  coef_p_s

12 combine plots

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p1 <- p1+
  ggtitle("Throat")

p3 <- p3+
  ggtitle("Stool")

grid_arranged_t <- grid.arrange(
  p1+ggtitle(""),
  NULL,
  plot_x_th+theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks = element_blank())+ggtitle("Throat")+labs(tag = "A") ,
  plot_y_th+theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank()),
  p3+ggtitle(""),
  NULL,
  plot_x+theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks = element_blank())+ggtitle("Stool")+labs(tag = "D") ,
  plot_y+theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank()),
  coef_p_t + labs(tag = "B"),
  p2 + labs(tag = "C"),
  coef_p_s + labs(tag = "E"),
  p4 + labs(tag = "F"),
  legend,
  ncol = 3, nrow = 5,
  heights = c(1, 2, 1,2, 0.5),
  widths=c(2,0.5,1.5),
  layout_matrix = rbind(c(3,2,9),
                        c(1,4,10),
                        c(7,6,11),
                        c(5,8,12),
                        c(13,13,13)
                      )
)

# Print or save the arranged plot
ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/Manuscript_16S/Submission_CHM/Fig3.tiff",grid_arranged_t, dpi = 600, width = 16, height = 18)

# combine stats table

throat$lm_stats <- throat$lm_stats %>% 
  mutate(Sample="Throat") %>% 
  select(Sample, everything())

stool$lm_stats <- stool$lm_stats %>% 
  mutate(Sample="Stool") %>% 
  select(Sample, everything())

bc_stats <- rbind(throat$lm_stats, stool$lm_stats)
kable(bc_stats)
Sample Months_after_ETI_start Estimate Std..Error df t.value p fdr fdr_star
Throat Baseline (Intercept) 0.6295880 0.0246169 44.33130 25.5754517 0.00000 0.00000 ***
Throat 3 months -0.0422141 0.0046345 8252.29314 -9.1085902 0.00000 0.00000 ***
Throat 6-12 months -0.0277683 0.0041913 7721.41521 -6.6252986 0.00000 0.00000 ***
Throat 15-18 months -0.0403870 0.0047460 5152.04960 -8.5097037 0.00000 0.00000 ***
Throat 21-24 months -0.0197816 0.0049099 3025.76258 -4.0289486 0.00006 0.00008 ***
Throat sex2 0.0290054 0.0211133 30.49098 1.3737977 0.17952 0.17952 ns
Throat age_y 0.0018013 0.0007411 34.61843 2.4306602 0.02040 0.02380 *
Stool Baseline (Intercept) 0.6686460 0.0254883 27.69322 26.2334641 0.00000 0.00000 ***
Stool 3 months 0.0154825 0.0023287 9205.85956 6.6485008 0.00000 0.00000 ***
Stool 6-12 months -0.0103262 0.0019336 3064.43496 -5.3402474 0.00000 0.00000 ***
Stool 15-18 months -0.0086080 0.0024037 752.81380 -3.5811349 0.00036 0.00042 ***
Stool 21-24 months -0.0324778 0.0027528 374.79532 -11.7979679 0.00000 0.00000 ***
Stool sex2 -0.0004800 0.0241039 20.86432 -0.0199126 0.98430 0.98430 ns
Stool age_y 0.0041525 0.0007890 31.47633 5.2629464 0.00001 0.00001 ***
#write_csv(bc_stats, "/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/lmm_bc_distance_control.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3     coefplot_1.2.8    vegan_2.6-4       lattice_0.20-45  
##  [5] permute_0.9-7     dendextend_1.17.1 ggpubr_0.4.0      readxl_1.3.1     
##  [9] naniar_1.0.0      lubridate_1.8.0   knitr_1.42        microbiome_1.16.0
## [13] janitor_2.1.0     magrittr_2.0.3    phyloseq_1.38.0   forcats_1.0.0    
## [17] stringr_1.5.0     dplyr_1.1.1       purrr_1.0.1       readr_2.1.2      
## [21] tidyr_1.3.0       tibble_3.2.1      ggplot2_3.4.1     tidyverse_1.3.1  
## [25] microViz_0.9.3   
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1        systemfonts_1.0.4      plyr_1.8.7            
##   [4] igraph_1.4.1           splines_4.1.0          TH.data_1.1-0         
##   [7] GenomeInfoDb_1.30.1    digest_0.6.29          useful_1.2.6.1        
##  [10] foreach_1.5.2          htmltools_0.5.5        viridis_0.6.2         
##  [13] lmerTest_3.1-3         fansi_1.0.3            cluster_2.1.2         
##  [16] tzdb_0.3.0             Biostrings_2.62.0      modelr_0.1.8          
##  [19] sandwich_3.0-1         colorspace_2.0-3       rvest_1.0.2           
##  [22] textshaping_0.3.6      haven_2.4.3            xfun_0.38             
##  [25] crayon_1.5.1           RCurl_1.98-1.12        jsonlite_1.8.0        
##  [28] lme4_1.1-30            zoo_1.8-10             survival_3.2-13       
##  [31] iterators_1.0.14       ape_5.7-1              glue_1.6.2            
##  [34] gtable_0.3.3           emmeans_1.7.2          zlibbioc_1.40.0       
##  [37] XVector_0.34.0         sjstats_0.18.1         sjmisc_2.8.9          
##  [40] car_3.0-12             Rhdf5lib_1.16.0        BiocGenerics_0.40.0   
##  [43] abind_1.4-5            scales_1.2.1           mvtnorm_1.1-3         
##  [46] DBI_1.1.2              ggeffects_1.1.1        rstatix_0.7.0         
##  [49] Rcpp_1.0.9             performance_0.8.0      xtable_1.8-4          
##  [52] viridisLite_0.4.1      stats4_4.1.0           datawizard_0.2.3      
##  [55] httr_1.4.2             ellipsis_0.3.2         pkgconfig_2.0.3       
##  [58] farver_2.1.1           sass_0.4.5             dbplyr_2.1.1          
##  [61] utf8_1.2.2             effectsize_0.6.0.1     tidyselect_1.2.0      
##  [64] labeling_0.4.2         rlang_1.1.0            reshape2_1.4.4        
##  [67] munsell_0.5.0          cellranger_1.1.0       tools_4.1.0           
##  [70] cachem_1.0.6           cli_3.6.1              generics_0.1.3        
##  [73] pacman_0.5.1           sjlabelled_1.1.8       ade4_1.7-22           
##  [76] broom_1.0.4            evaluate_0.15          biomformat_1.22.0     
##  [79] fastmap_1.1.0          yaml_2.3.5             ragg_1.2.5            
##  [82] fs_1.6.3               visdat_0.6.0           nlme_3.1-155          
##  [85] xml2_1.3.3             compiler_4.1.0         rstudioapi_0.13       
##  [88] ggsignif_0.6.3         reprex_2.0.1           bslib_0.4.2           
##  [91] stringi_1.7.8          parameters_0.16.0      highr_0.9             
##  [94] Matrix_1.4-0           nloptr_2.0.3           multtest_2.50.0       
##  [97] vctrs_0.6.1            pillar_1.9.0           lifecycle_1.0.3       
## [100] rhdf5filters_1.6.0     jquerylib_0.1.4        estimability_1.3      
## [103] data.table_1.14.2      cowplot_1.1.1          bitops_1.0-7          
## [106] insight_0.16.0         R6_2.5.1               IRanges_2.28.0        
## [109] codetools_0.2-18       boot_1.3-28            MASS_7.3-55           
## [112] assertthat_0.2.1       rhdf5_2.38.1           withr_2.5.0           
## [115] multcomp_1.4-18        S4Vectors_0.32.4       GenomeInfoDbData_1.2.7
## [118] mgcv_1.8-38            bayestestR_0.11.5      parallel_4.1.0        
## [121] hms_1.1.1              grid_4.1.0             sjPlot_2.8.10         
## [124] coda_0.19-4            minqa_1.2.4            rmarkdown_2.21        
## [127] snakecase_0.11.0       carData_3.0-5          Rtsne_0.16            
## [130] numDeriv_2016.8-1.1    Biobase_2.54.0